In [52]:
import mysql.connector
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,confusion_matrix
In [2]:
con = mysql.connector.connect(
    host = "localhost",
    user = "root",
    password = "Riddhi@2105",
    database = "inventory"
)
In [3]:
df = pd.read_sql("SELECT id, name, sku, category, brand, quantity, price, cost_price, profit_margin, warehouse_id, supplier_id, reorder_level, damaged_units, gender, expiry_date, created_at FROM inventory",con);
C:\Users\rp301\AppData\Local\Temp\ipykernel_9912\1314326230.py:1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
  df = pd.read_sql("SELECT id, name, sku, category, brand, quantity, price, cost_price, profit_margin, warehouse_id, supplier_id, reorder_level, damaged_units, gender, expiry_date, created_at FROM inventory",con);
In [4]:
df.head()
Out[4]:
id name sku category brand quantity price cost_price profit_margin warehouse_id supplier_id reorder_level damaged_units gender expiry_date created_at
0 1 0 SKU-Y9DKK8 Grocery SupplyCo 108 2957.93 2086.88 33.80 3 12 129 7 Female 2023-01-15 2021-02-02
1 2 1 SKU-GOLA8P Hardware GlobalTech 5 2283.99 1810.70 16.56 5 22 49 15 Male 2023-05-24 2020-02-24
2 3 2 SKU-G2ZGSE Electronics SupplyCo 353 2494.71 1662.74 21.84 5 5 17 6 Male 2022-05-13 2022-02-20
3 4 3 SKU-AHJ8SQ Electronics SupplyCo 73 343.22 2919.88 22.15 8 1 122 14 Female 2021-12-12 2020-12-27
4 5 4 SKU-61ANLS Grocery SupplyCo 21 3987.21 130.33 23.87 4 18 81 11 Male 2021-12-26 2022-05-26
In [5]:
df.tail()
Out[5]:
id name sku category brand quantity price cost_price profit_margin warehouse_id supplier_id reorder_level damaged_units gender expiry_date created_at
1495 1496 1495 SKU-BOEAFZ Grocery BrandY 29 630.84 2597.12 15.99 4 6 60 13 Male 2020-12-12 2022-01-30
1496 1497 1496 SKU-GFU912 Grocery GlobalTech 363 4665.67 2836.87 26.49 5 3 84 9 Male 2021-07-28 2023-06-07
1497 1498 1497 SKU-SZIP99 Stationery BrandX 223 1222.33 2329.88 35.30 1 29 64 20 Female 2021-03-22 2020-12-27
1498 1499 1498 SKU-6DFQX3 Stationery BrandZ 35 343.03 2041.12 20.45 1 16 125 18 Female 2021-01-23 2022-07-23
1499 1500 1499 SKU-VKFSOC Cosmetics GlobalTech 166 721.46 615.17 16.20 10 21 109 6 Female 2020-08-27 2023-07-27
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1500 non-null   int64  
 1   name           1500 non-null   object 
 2   sku            1500 non-null   object 
 3   category       1500 non-null   object 
 4   brand          1500 non-null   object 
 5   quantity       1500 non-null   int64  
 6   price          1500 non-null   float64
 7   cost_price     1500 non-null   float64
 8   profit_margin  1500 non-null   float64
 9   warehouse_id   1500 non-null   int64  
 10  supplier_id    1500 non-null   int64  
 11  reorder_level  1500 non-null   int64  
 12  damaged_units  1500 non-null   int64  
 13  gender         1500 non-null   object 
 14  expiry_date    1500 non-null   object 
 15  created_at     1500 non-null   object 
dtypes: float64(3), int64(6), object(7)
memory usage: 187.6+ KB
In [7]:
df.isnull().sum()
Out[7]:
id               0
name             0
sku              0
category         0
brand            0
quantity         0
price            0
cost_price       0
profit_margin    0
warehouse_id     0
supplier_id      0
reorder_level    0
damaged_units    0
gender           0
expiry_date      0
created_at       0
dtype: int64
In [8]:
target_encoder = LabelEncoder()
In [9]:
target_encoder
Out[9]:
LabelEncoder()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LabelEncoder()
In [10]:
df["gender"] = target_encoder.fit_transform(df["gender"])
In [11]:
le = LabelEncoder()
le
Out[11]:
LabelEncoder()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LabelEncoder()
In [12]:
df["sku"] = le.fit_transform(df["sku"])
In [13]:
df["category"] = le.fit_transform(df["category"])
In [14]:
df["expiry_date"] = le.fit_transform(df["expiry_date"])
In [15]:
df["created_at"] = le.fit_transform(df["created_at"])
In [16]:
df["brand"] = le.fit_transform(df["brand"])
In [17]:
df["gender"]
Out[17]:
0       0
1       1
2       1
3       0
4       1
       ..
1495    1
1496    1
1497    0
1498    0
1499    0
Name: gender, Length: 1500, dtype: int32
In [18]:
df["sku"]
Out[18]:
0       1428
1        711
2        683
3        433
4        254
        ... 
1495     483
1496     706
1497    1214
1498     268
1499    1315
Name: sku, Length: 1500, dtype: int32
In [19]:
df["category"]
Out[19]:
0       3
1       4
2       2
3       2
4       3
       ..
1495    3
1496    3
1497    5
1498    5
1499    1
Name: category, Length: 1500, dtype: int32
In [20]:
df["expiry_date"]
Out[20]:
0       589
1       675
2       424
3       328
4       337
       ... 
1495     95
1496    243
1497    161
1498    124
1499     22
Name: expiry_date, Length: 1500, dtype: int32
In [21]:
df["created_at"]
Out[21]:
0       234
1        29
2       475
3       210
4       538
       ... 
1495    461
1496    781
1497    210
1498    582
1499    811
Name: created_at, Length: 1500, dtype: int32
In [22]:
X = df.drop("gender",axis = 1)
In [23]:
X
Out[23]:
id name sku category brand quantity price cost_price profit_margin warehouse_id supplier_id reorder_level damaged_units expiry_date created_at
0 1 0 1428 3 4 108 2957.93 2086.88 33.80 3 12 129 7 589 234
1 2 1 711 4 3 5 2283.99 1810.70 16.56 5 22 49 15 675 29
2 3 2 683 2 4 353 2494.71 1662.74 21.84 5 5 17 6 424 475
3 4 3 433 2 4 73 343.22 2919.88 22.15 8 1 122 14 328 210
4 5 4 254 3 4 21 3987.21 130.33 23.87 4 18 81 11 337 538
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1495 1496 1495 483 3 1 29 630.84 2597.12 15.99 4 6 60 13 95 461
1496 1497 1496 706 3 3 363 4665.67 2836.87 26.49 5 3 84 9 243 781
1497 1498 1497 1214 5 0 223 1222.33 2329.88 35.30 1 29 64 20 161 210
1498 1499 1498 268 5 2 35 343.03 2041.12 20.45 1 16 125 18 124 582
1499 1500 1499 1315 1 3 166 721.46 615.17 16.20 10 21 109 6 22 811

1500 rows × 15 columns

In [24]:
y = df["gender"]
In [25]:
y
Out[25]:
0       0
1       1
2       1
3       0
4       1
       ..
1495    1
1496    1
1497    0
1498    0
1499    0
Name: gender, Length: 1500, dtype: int32
In [26]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)
In [27]:
X_train
Out[27]:
id name sku category brand quantity price cost_price profit_margin warehouse_id supplier_id reorder_level damaged_units expiry_date created_at
382 383 382 1033 1 2 450 3296.78 1331.37 38.27 6 30 13 3 896 342
538 539 538 1311 1 2 296 3874.62 1428.85 27.58 6 3 51 10 125 682
1493 1494 1493 612 0 0 488 4522.45 679.71 16.78 10 29 47 5 378 255
1112 1113 1112 771 0 2 272 2190.32 476.00 5.96 4 25 77 11 13 244
324 325 324 324 0 4 3 2354.73 224.33 12.72 2 22 110 15 183 43
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1130 1131 1130 614 0 0 424 3440.37 635.41 28.80 6 2 94 3 907 545
1294 1295 1294 625 5 2 464 878.77 1529.35 6.62 1 9 118 6 43 411
860 861 860 757 4 2 90 4104.86 2403.59 28.93 1 21 137 5 112 60
1459 1460 1459 44 1 3 338 1877.36 159.50 17.04 7 30 88 20 664 769
1126 1127 1126 434 0 4 352 3317.59 1235.46 29.87 9 26 28 15 414 589

1200 rows × 15 columns

In [28]:
X_test
Out[28]:
id name sku category brand quantity price cost_price profit_margin warehouse_id supplier_id reorder_level damaged_units expiry_date created_at
1116 1117 1116 1469 0 1 50 4848.39 566.86 15.38 10 15 60 19 208 746
1368 1369 1368 1270 0 3 382 4811.31 406.38 12.76 5 24 137 5 552 289
422 423 422 349 3 3 265 649.57 2519.33 16.91 2 12 16 18 461 538
413 414 413 1267 5 2 366 2280.60 1193.83 25.85 10 3 94 9 68 935
451 452 451 1078 0 0 267 2248.81 2770.25 7.40 8 19 11 19 751 335
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
983 984 983 1120 4 3 104 2727.62 664.39 26.81 8 11 149 7 217 193
799 800 799 747 2 0 183 907.79 742.73 9.11 8 14 28 12 452 131
1265 1266 1265 846 0 1 54 1963.80 2055.25 20.18 6 26 54 0 130 883
1150 1151 1150 1365 0 3 326 86.80 1288.89 39.20 10 5 66 19 903 680
824 825 824 1224 4 4 249 3039.67 1313.87 11.10 7 17 104 1 467 303

300 rows × 15 columns

In [29]:
y_train
Out[29]:
382     0
538     0
1493    0
1112    0
324     0
       ..
1130    0
1294    0
860     1
1459    1
1126    1
Name: gender, Length: 1200, dtype: int32
In [30]:
y_test
Out[30]:
1116    1
1368    1
422     1
413     0
451     1
       ..
983     0
799     1
1265    1
1150    0
824     1
Name: gender, Length: 300, dtype: int32
In [31]:
model = LogisticRegression(max_iter = 2000)
model.fit(X_train,y_train)
C:\Users\rp301\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Out[31]:
LogisticRegression(max_iter=2000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(max_iter=2000)
In [32]:
import plotly.express as px
In [33]:
px.line(x= df["brand"],y=df["gender"]).show()
In [34]:
import seaborn as sns
In [35]:
px.pie(values=df["quantity"],names=df["category"]).show()
In [51]:
sns.violinplot(data=df["price"])
plt.show()
No description has been provided for this image
In [53]:
y_pred = model.predict(X_test)
In [54]:
y_pred
Out[54]:
array([0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1])
In [55]:
print(accuracy_score(y_test,y_pred))
0.47333333333333333
In [59]:
cm = confusion_matrix(y_test,y_pred)
In [60]:
cm
Out[60]:
array([[53, 99],
       [59, 89]], dtype=int64)
In [92]:
sns.heatmap(cm,annot = True,cmap = "cividis")
plt.xlabel("Acutal")
plt.ylabel("Predict")
plt.title("Acutal Vs Predict")
plt.show()
No description has been provided for this image
In [ ]: